STMトピックモデル

Stucked Area plot

library(stm)
library(dplyr)
library(tidyverse)
library(tidytext)
library(tidystm)
#devtools::install_github("mikajoh/tidystm")
library(ggplot2)
library(ggthemes)
library(sunburstR)

model <- readRDS("data/model_12.obj")
meta <- readRDS("data/meta.obj")
raw <- read.csv("data/tiabList.csv")
data <- na.omit(raw)


prep3 <-stm:: estimateEffect(c(1:12) ~ poly(py,3), model, metadata=meta)

td_beta<-tidy(model)

library(RColorBrewer)
library(plotly)
library(tidystm)
cols<-brewer.pal(12, "Set3")

effect <- extract.estimateEffect(prep3, 'py', 
                                 model = model,
                                 method="pointestimate", 
                                 labeltype="prob")


effect$covariate.value<-as.numeric(effect$covariate.value)

effect%>%filter(covariate.value>1990)->effect2


names(effect2)[2]<-"Topic"

effect2$Topic<-as.factor(effect2$Topic)


g = ggplot(effect2, aes( x = covariate.value, 
                         y = estimate, fill = Topic))

g = g + geom_area(col="white")+
  ylim(0,1.10)+
  xlab("time")+
  ylab("Relative topic prevalence")+
 scale_fill_brewer(palette = "Set3")+
  theme_minimal()+ 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

#library(htmlwidgets)

plotly::ggplotly(g)
#htmlwidgets::saveWidget(l, "stucked.html")

Sunburst plot

top_terms <- td_beta %>%
  arrange(beta) %>%
  group_by(topic) %>%
  top_n(30, beta) %>% arrange(topic,beta)

sequence<-paste0("Topic ",top_terms$topic,"-",top_terms$term)

dfdf<-data.frame(sequence,prob=top_terms$beta)

dfdf%>%sunburst(explanation =
                  "function(d){return d.data.name}")->p
p
Legend
#htmltools::save_html(p, file = "sunburst.html")

Word Cloud

library(wordcloud2)
library(hwordcloud)
bb<-sort(seq(1,500,12)^3,decreasing = T)
bbn<-length(bb)
topics <- labelTopics(model, 1:12,bbn)


for (i in 1:12) {
  
  topics$frex[i,]
  bbn1<-bbn+1
  frex <- data.frame(words=topics$frex[i,], n=bbn1-seq(topics$frex[i,]), stringsAsFactors=F)
  frex$words <- str_replace_all(frex$words, "_", " ")
  bb<-sort(seq(0.1,2,0.07),decreasing = T)
  clouds <- data.frame(words = c(frex$words), 
                       weight = c(frex$n*bb))
    assign(paste0("wc_", i), hwordcloud(text = clouds$word, 
                                        size = clouds$weight,
                                        width = "100%",
                                        height = "200px",
                                        theme = "darkblue"))
}




wc_1
wc_2
wc_3
wc_4
wc_5
wc_6
wc_7
wc_8
wc_9
wc_10
wc_11
wc_12

LDAvis

temp<-textProcessor(documents=data$ab,metadata=meta)
## Building corpus... 
## Converting to Lower Case... 
## Removing punctuation... 
## Removing stopwords... 
## Removing numbers... 
## Stemming... 
## Creating Output...
toLDAvis(model,temp$documents,out.dir = "jsn")
LDAvis